Code
knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)
source('dependencies.R')[1] 112
Data of 112 schools performance on Next Generation HS Introductory Physics exam in Spring of 2022 and Spring of 2023. 87 of those schools tested students in both years and 25 of those schools only tested students in 1 of the 2 testing years.
## Compute % Earned by Practice Category
## Compute State % Earned by Practice Category
## Compute SD by Practice Category
## Compute State % Earned by Reporting Category
## Compute SD by Reporting Category
Practice_Cat_Sum <- HSPhy_NextGen_SchoolIT301DF%>%
group_by(`Practice Category`)%>%
summarise(`PC Mean Diff` = mean(`School-State Diff`),
`PC Med Diff` = median(`School-State Diff`),
`PC SD Diff` = sd(`School-State Diff`))
Practice_Cat_Sum#view(LowEStateDF)
LowEPractice<-LowEStateDF%>%
select(`Year`, `Subject`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`, `State Avg. Scaled Score`)%>%
left_join(Practice_Cat_School_Sum, by = c( "School Code" = "School Code"))%>%
select(`Year`, `School Name`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`,
`State Avg. Scaled Score`, `Practice Category`, `PC Mean Diff`, `PC Med Diff`, `PC SD Diff`)
LowEPractice%>%
group_by(`Practice Category`)%>%
summarise(`Low E PC Mean Diff` = mean(`PC Mean Diff`, na.rm = TRUE),
`Low E PC Med Diff` = median(`PC Med Diff`, na.rm = TRUE),
`Low E PC SD Diff` = sd(`PC SD Diff`, na.rm = TRUE)
)#view(HighEStateDF)
HighEPractice<-HighEStateDF%>%
select(`Year`, `Subject`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`, `State Avg. Scaled Score`)%>%
left_join(Practice_Cat_School_Sum, by = c( "School Code" = "School Code"))%>%
select(`Year`, `School Name`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`,
`State Avg. Scaled Score`, `Practice Category`, `PC Mean Diff`, `PC Med Diff`, `PC SD Diff`)
HighEPractice%>%
group_by(`Practice Category`)%>%
summarise(`High E PC Mean Diff` = mean(`PC Mean Diff`, na.rm = TRUE),
`High E PC Med Diff` = median(`PC Med Diff`, na.rm = TRUE),
`High E PC SD Diff` = sd(`PC SD Diff`, na.rm = TRUE)
)#view(HighEStateDF)
HighMEPractice<-HighMEStateDF%>%
select(`Year`, `Subject`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`, `State Avg. Scaled Score`)%>%
left_join(Practice_Cat_School_Sum, by = c( "School Code" = "School Code"))%>%
select(`Year`, `School Name`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`,
`State Avg. Scaled Score`, `Practice Category`, `PC Mean Diff`, `PC Med Diff`, `PC SD Diff`)
HighMEPractice%>%
group_by(`Practice Category`)%>%
summarise(`High ME PC Mean Diff` = mean(`PC Mean Diff`, na.rm = TRUE),
`High ME PC Med Diff` = median(`PC Med Diff`, na.rm = TRUE),
`High ME PC SD Diff` = sd(`PC SD Diff`, na.rm = TRUE)
)#view(LowEStateDF)
LowMEPractice<-LowMEStateDF%>%
select(`Year`, `Subject`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`, `State Avg. Scaled Score`)%>%
left_join(Practice_Cat_School_Sum, by = c( "School Code" = "School Code"))%>%
select(`Year`, `School Name`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`,
`State Avg. Scaled Score`, `Practice Category`, `PC Mean Diff`, `PC Med Diff`, `PC SD Diff`)
LowMEPractice%>%
group_by(`Practice Category`)%>%
summarise(`Low ME PC Mean Diff` = mean(`PC Mean Diff`, na.rm = TRUE),
`Low ME PC Med Diff` = median(`PC Med Diff`, na.rm = TRUE),
`Low ME PC SD Diff` = sd(`PC SD Diff`, na.rm = TRUE)
)LowEHighMStateDF<-HSPhy_NextGen_StateAchievementDF%>%
mutate(`MHigh` = case_when(`Performance Level` == "M" & `Performance Diff` >= 0 ~ TRUE,
`Performance Level` == "M" & `Performance Diff` < 0 ~ FALSE))%>%
mutate(`ELow` = case_when(`Performance Level` == "E" & `Performance Diff` < 0 ~ TRUE,
`Performance Level` == "E" & `Performance Diff` >= 0 ~ FALSE))%>%
mutate(`EMOpp` = case_when(`MHigh` == TRUE && `ELow` == TRUE ~ TRUE))#%>%
#filter(`EMOpp` == TRUE)#%>%
#filter(`MHigh` == TRUE)
view(LowEHighMStateDF)---
title: "Data Cleaning"
format:
html:
df-print: paged
toc: true
code-fold: true
code-copy: true
code-tools: true
editor:
markdown:
wrap: 72
---
# Load libraries
```{r}
#| label: setup
#| warning: false
#| message: false
knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)
source('dependencies.R')
```
# Read in Data Sets
## State Item Analysis Report
```{r}
HSPhy_2022_StateItemDF<-read_state_item("data/2022_Physics_District_NextGenMCASItem.xlsx", 2022, "PHY" )
HSPhy_2022_StateItemDF
HSPhy_2023_StateItemDF<-read_state_item("data/2023_Physics_District_NextGenMCASItem.xlsx", 2023, "PHY" )
HSPhy_2023_StateItemDF
```
## School Item Analysis Reports
```{r}
HSPhy_2022_SchoolItemDF<-read_school_item("data/2022_Physics_NextGenMCASItem.xlsx", 2022, "PHY")
HSPhy_2022_SchoolItemDF
HSPhy_2022_SchoolItemDF<-HSPhy_2022_SchoolItemDF%>%
left_join(HSPhy_2022_StateItemDF, by= c('Year'='Year', 'Subject'='Subject', 'ITEM' = 'ITEM'))%>%
mutate(`School-State Diff` = `School%`- `State%`)
HSPhy_2022_SchoolItemDF
HSPhy_2023_SchoolItemDF<-read_school_item("data/2023_Physics_NextGenMCASItem.xlsx", 2023, "PHY")
HSPhy_2023_SchoolItemDF
HSPhy_2023_SchoolItemDF<-HSPhy_2023_SchoolItemDF%>%
left_join(HSPhy_2023_StateItemDF, by= c('Year'='Year', 'Subject'='Subject', 'ITEM' = 'ITEM'))%>%
mutate(`School-State Diff` = `School%`- `State%`)
HSPhy_2023_SchoolItemDF
tail(HSPhy_2023_SchoolItemDF)
tail(HSPhy_2022_SchoolItemDF)
HSPhy_NextGen_SchoolItemDF <- rbind(HSPhy_2022_SchoolItemDF, HSPhy_2023_SchoolItemDF)
HSPhy_NextGen_SchoolItemDF
```
## State NextGen Achievement Reports
```{r}
HSPhy_2022_StateAchievementDF<-read_state_achievement("data/2022_HSSci_NextGenMCAS.xlsx", 2022, "PHY" )
HSPhy_2022_StateAchievementDF
HSPhy_2023_StateAchievementDF<-read_state_achievement("data/2023_HSSci_NextGenMCAS.xlsx", 2023, "PHY" )
HSPhy_2023_StateAchievementDF
HSPhy_NextGen_StateAchievementDF<-rbind(HSPhy_2022_StateAchievementDF, HSPhy_2023_StateAchievementDF)
HSPhy_NextGen_StateAchievementDF
length(unique(HSPhy_NextGen_StateAchievementDF$`School Code`))
# HSPhy_NextGen_StateAchievementDF%>%
# group_by(`School Code`, `Performance Level`)%>%
# summarize(`Performance Count` = sum(`Performance Count`, na.rm = TRUE),
# `Tested Students` = sum(`Tested Students`, na.rm = TRUE),
# )
```
## IT301 Reports
```{r}
SG9_standardXWalk<-read_excel("data/NextGenMCASItemxWalk.xlsx", sheet = "HS_Phys_StandardxWalk")
IT301_test<-read_excel("data/2023_Physics_IT301 MCAS District and School Test Item Analysis Summary.xlsx", skip = 14)
IT301_test
IT301_2022<-read_IT301("data/2022_Physics_IT301 MCAS District and School Test Item Analysis Summary.xlsx", 2022, "PHY" )
IT301_2022
IT301_2023<-read_IT301("data/2023_Physics_IT301 MCAS District and School Test Item Analysis Summary.xlsx", 2023, "PHY" )
IT301_2023
NextGenIT301<- rbind(IT301_2022, IT301_2023)
HSPhy_NextGenIT301<-NextGenIT301%>%
left_join(SG9_standardXWalk, by = "Standard")
HSPhy_NextGenIT301
```
## Join IT301 Summary reports to SchoolItem performance Reports
```{r}
HSPhy_NextGen_SchoolIT301DF <- left_join(HSPhy_NextGen_SchoolItemDF, HSPhy_NextGenIT301, by = c("Year" = "Year", "Subject" = "Subject", "ITEM" = "ITEM"))
HSPhy_NextGen_SchoolIT301DF
view(HSPhy_NextGen_SchoolIT301DF)
length(unique(HSPhy_NextGen_SchoolIT301DF$`School Name`))
```
## Compute Key Summary Stats for IT301 Reports
Data of 112 schools performance on Next Generation HS Introductory Physics exam in Spring of 2022 and Spring of 2023. 87 of those schools tested students in both years and 25 of those schools only tested students in 1 of the 2 testing years.
```{r}
## Compute % Earned by Practice Category
## Compute State % Earned by Practice Category
## Compute SD by Practice Category
## Compute State % Earned by Reporting Category
## Compute SD by Reporting Category
Practice_Cat_Sum <- HSPhy_NextGen_SchoolIT301DF%>%
group_by(`Practice Category`)%>%
summarise(`PC Mean Diff` = mean(`School-State Diff`),
`PC Med Diff` = median(`School-State Diff`),
`PC SD Diff` = sd(`School-State Diff`))
Practice_Cat_Sum
Practice_Cat_School_Sum <- HSPhy_NextGen_SchoolIT301DF%>%
group_by(`School Name`, `School Code`, `Practice Category`)%>%
summarise(`PC Mean Diff` = mean(`School-State Diff`),
`PC Med Diff` = median(`School-State Diff`),
`PC SD Diff` = sd(`School-State Diff`))
Practice_Cat_School_Sum
Reporting_Cat_Sum <- HSPhy_NextGen_SchoolIT301DF%>%
group_by(`Reporting Category`)%>%
summarise(`RC Mean Diff` = mean(`School-State Diff`),
`RC Med Diff` = median(`School-State Diff`),
`RC SD Diff` = sd(`School-State Diff`))
Reporting_Cat_Sum
Reporting_Cat_School_Sum <- HSPhy_NextGen_SchoolIT301DF%>%
group_by(`School Name`, `School Code`, `Reporting Category`)%>%
summarise(`RC Mean Diff` = mean(`School-State Diff`),
`RC Med Diff` = median(`School-State Diff`),
`RC SD Diff` = sd(`School-State Diff`))
Reporting_Cat_School_Sum
```
## State E below average with Practice Category
```{r}
# State E below average
HSPhy_NextGen_StateAchievementDF
LowEStateDF<- HSPhy_NextGen_StateAchievementDF%>%
filter(`Performance Level` == "E")%>%
filter(`Performance Diff` < 0)%>%
mutate(`ELow` = TRUE)
LowEStateDF
#view(LowEStateDF)
LowEPractice<-LowEStateDF%>%
select(`Year`, `Subject`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`, `State Avg. Scaled Score`)%>%
left_join(Practice_Cat_School_Sum, by = c( "School Code" = "School Code"))%>%
select(`Year`, `School Name`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`,
`State Avg. Scaled Score`, `Practice Category`, `PC Mean Diff`, `PC Med Diff`, `PC SD Diff`)
LowEPractice%>%
group_by(`Practice Category`)%>%
summarise(`Low E PC Mean Diff` = mean(`PC Mean Diff`, na.rm = TRUE),
`Low E PC Med Diff` = median(`PC Med Diff`, na.rm = TRUE),
`Low E PC SD Diff` = sd(`PC SD Diff`, na.rm = TRUE)
)
```
## State E above average with Practice Category
```{r}
HSPhy_NextGen_StateAchievementDF
HighEStateDF<- HSPhy_NextGen_StateAchievementDF%>%
filter(`Performance Level` == "E")%>%
filter(`Performance Diff` > 0)%>%
mutate(`ELow` = FALSE)
HighEStateDF
#view(HighEStateDF)
HighEPractice<-HighEStateDF%>%
select(`Year`, `Subject`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`, `State Avg. Scaled Score`)%>%
left_join(Practice_Cat_School_Sum, by = c( "School Code" = "School Code"))%>%
select(`Year`, `School Name`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`,
`State Avg. Scaled Score`, `Practice Category`, `PC Mean Diff`, `PC Med Diff`, `PC SD Diff`)
HighEPractice%>%
group_by(`Practice Category`)%>%
summarise(`High E PC Mean Diff` = mean(`PC Mean Diff`, na.rm = TRUE),
`High E PC Med Diff` = median(`PC Med Diff`, na.rm = TRUE),
`High E PC SD Diff` = sd(`PC SD Diff`, na.rm = TRUE)
)
```
## State E and M above average
```{r}
HSPhy_NextGen_StateAchievementDF
HighMEStateDF<- HSPhy_NextGen_StateAchievementDF%>%
filter(`Performance Level` == "E" | `Performance Level` == "M")%>%
filter(`Performance Diff` > 0)%>%
mutate(`MELow` = FALSE)
HighMEStateDF
#view(HighEStateDF)
HighMEPractice<-HighMEStateDF%>%
select(`Year`, `Subject`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`, `State Avg. Scaled Score`)%>%
left_join(Practice_Cat_School_Sum, by = c( "School Code" = "School Code"))%>%
select(`Year`, `School Name`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`,
`State Avg. Scaled Score`, `Practice Category`, `PC Mean Diff`, `PC Med Diff`, `PC SD Diff`)
HighMEPractice%>%
group_by(`Practice Category`)%>%
summarise(`High ME PC Mean Diff` = mean(`PC Mean Diff`, na.rm = TRUE),
`High ME PC Med Diff` = median(`PC Med Diff`, na.rm = TRUE),
`High ME PC SD Diff` = sd(`PC SD Diff`, na.rm = TRUE)
)
```
## State E or M below average
```{r}
HSPhy_NextGen_StateAchievementDF
LowMEStateDF<- HSPhy_NextGen_StateAchievementDF%>%
filter(`Performance Level` == "E" | `Performance Level` == "M")%>%
filter(`Performance Diff` < 0)%>%
mutate(`MELow` = FALSE)
LowMEStateDF
#view(LowEStateDF)
LowMEPractice<-LowMEStateDF%>%
select(`Year`, `Subject`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`, `State Avg. Scaled Score`)%>%
left_join(Practice_Cat_School_Sum, by = c( "School Code" = "School Code"))%>%
select(`Year`, `School Name`, `School Code`, `Tested Students`, `Performance Level`, `Performance Count`, `Performance%`, `State Performance%`, `Performance Diff`, `Avg. Scaled Score`,
`State Avg. Scaled Score`, `Practice Category`, `PC Mean Diff`, `PC Med Diff`, `PC SD Diff`)
LowMEPractice%>%
group_by(`Practice Category`)%>%
summarise(`Low ME PC Mean Diff` = mean(`PC Mean Diff`, na.rm = TRUE),
`Low ME PC Med Diff` = median(`PC Med Diff`, na.rm = TRUE),
`Low ME PC SD Diff` = sd(`PC SD Diff`, na.rm = TRUE)
)
```
## State E below, M above
```{r}
HSPhy_NextGen_StateAchievementDF
LowEHighMStateDF<-HSPhy_NextGen_StateAchievementDF%>%
mutate(`MHigh` = case_when(`Performance Level` == "M" & `Performance Diff` >= 0 ~ TRUE,
`Performance Level` == "M" & `Performance Diff` < 0 ~ FALSE))%>%
mutate(`ELow` = case_when(`Performance Level` == "E" & `Performance Diff` < 0 ~ TRUE,
`Performance Level` == "E" & `Performance Diff` >= 0 ~ FALSE))%>%
mutate(`EMOpp` = case_when(`MHigh` == TRUE && `ELow` == TRUE ~ TRUE))#%>%
#filter(`EMOpp` == TRUE)#%>%
#filter(`MHigh` == TRUE)
view(LowEHighMStateDF)
```